package com.yzq.os.spider.v.service.http;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.zip.GZIPInputStream;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.client.utils.URIUtils;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Service;

import com.yzq.os.spider.v.Constants;
import com.yzq.os.spider.v.util.Encode;
import com.yzq.os.spider.v.util.Regex;

/**
 * HTTP请求服务类
 * @author 苑志强(xingyu_yzq@163.com)
 *
 */
@Service
public final class HttpClientService {

	private static final Logger logger = Logger.getLogger(HttpClientService.class);

/**
 *  默认请求头
 */
	private static final List<Header> DEFAULT_REQUEST_HEADERS = new ArrayList<Header>();

	/**
	 * 支持的user_Agent
	 */
	private static final List<String> USER_AGENTS = new ArrayList<String>();

/**
 * 从页面中获取编码
 */
	private static final String REG_CHARSET = "<meta[^>]*?charset=([a-z|A-Z|0-9]*[\\-]*[0-9]*)[\\s|\\S]*";

	private static final int MAX_TOTAL_CONNECTIONS = 40;
	private static final int MAX_ROUTE_CONNECTIONS = 3;
	private static final int CONNECT_TIMEOUT = 30000;
	private static final int READ_TIMEOUT = 30000;

	private static DefaultHttpClient httpClient;

	static {
		DEFAULT_REQUEST_HEADERS.add(new BasicHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"));
		DEFAULT_REQUEST_HEADERS.add(new BasicHeader("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3"));
		DEFAULT_REQUEST_HEADERS.add(new BasicHeader("Accept-Language", "zh-CN,zh;q=0.8"));
		DEFAULT_REQUEST_HEADERS.add(new BasicHeader("Connection", "keep-alive"));

		USER_AGENTS.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1");
		USER_AGENTS.add("Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24");
		USER_AGENTS.add("Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)");

		ThreadSafeClientConnManager connManager = new ThreadSafeClientConnManager();
		connManager.setMaxTotal(MAX_TOTAL_CONNECTIONS);
		connManager.setDefaultMaxPerRoute(MAX_ROUTE_CONNECTIONS);

		httpClient = new DefaultHttpClient(connManager);

		httpClient.setHttpRequestRetryHandler(new DefaultHttpRequestRetryHandler(5, true));
		httpClient.setRedirectStrategy(new UTFRedirectStrategy());
		httpClient.getParams().setParameter(HttpProtocolParams.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
		httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, CONNECT_TIMEOUT);
		httpClient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, READ_TIMEOUT);
		httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY);
		httpClient.getParams().setParameter(ClientPNames.DEFAULT_HEADERS, DEFAULT_REQUEST_HEADERS);
		if (Constants.USE_PROXY_SERVER) {
			HttpHost proxy = new HttpHost(Constants.PROXY_SERVER_HOST, Constants.PROXY_SERVER_PORT);
			httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
		}
	}


	public String doGetRequest(String fullURL, boolean isGzip) throws Exception {
		return doGetRequest(fullURL, Encode.GB18030, isGzip, null);
	}

	public String doGetRequest(String baseUrl, List<NameValuePair> params, Encode urlEncode, boolean isGzip) throws Exception {
		String url = makeGetURL(baseUrl, params, urlEncode);
		return doGetRequest(url, isGzip);
	}

	public String doGetRequest(String fullURL, boolean isGzip, List<Header> resetHeaders) throws Exception {
		return doGetRequest(fullURL, Encode.GB18030, isGzip, resetHeaders);
	}

	public String doGetRequest(String fullURL, Encode defaultEncode, boolean isGzip, List<Header> resetHeaders) throws Exception {
		HttpGet get = new HttpGet(fullURL);
		setCustomRequestHeaders(get, fullURL, isGzip, resetHeaders);
		Header[] requestHeaders = null;
		HttpResponse response = null;
		Header[] responseHeaders = null;
		String html = null;
		try {
			requestHeaders = get.getAllHeaders();
			response = httpClient.execute(get);
			responseHeaders = response.getAllHeaders();
			int statusCode = response.getStatusLine().getStatusCode();
			if (statusCode == HttpStatus.SC_OK) {
				HttpEntity entity = response.getEntity();
				if (entity != null) {
					html = getResponseString(entity, isGzip, defaultEncode);
				} else {
					logger.error("Entity:[" + entity + "] Url[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "], responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]");
					if (get != null) {
						get.abort();
					}
				}
			} else if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY) {
				String location = null;
				logger.info("StatusCode:[" + statusCode + "] fullURL:[" + fullURL + "]");
				Header locationHeader = response.getFirstHeader("Location");
				if (locationHeader != null) {
					location = locationHeader.getValue();
					if (get != null) {
						get.abort();
					}
					logger.error("StatusCode:[" + statusCode + "] fullURL:[" + fullURL + "] location:[" + location + "]");
					html = doGetRequest(location, isGzip);
				} else {
					logger.error("StatusCode:[" + statusCode + "] fullURL:[" + fullURL + "] location:[" + location + "]");
				}
			} else {
				logger.error("StatusCode:[" + statusCode + "] Url[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "], responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]");
				if (get != null) {
					get.abort();
				}
			}
		} catch (ClientProtocolException e) {
			logger.error("ClientProtocolException Url[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "], responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (get != null) {
				get.abort();
			}
			throw e;
		} catch (IOException e) {
			logger.error("IOException Url[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "], responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (get != null) {
				get.abort();
			}
			throw e;
		} catch (Exception e) {
			logger.error("Exception Url[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "], responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (get != null) {
				get.abort();
			}
			throw e;
		}
		return html;
	}

	public String doPostRequest(String fullURL, Encode defaultEncode, boolean isGzip, List<Header> resetHeaders, List<NameValuePair> parameters, String encoding) throws Exception {
		String html = null;
		HttpPost post = new HttpPost(fullURL);
		Header[] requestHeaders = null;
		Header[] responseHeaders = null;
		try {
			setCustomRequestHeaders(post, fullURL, isGzip, resetHeaders);
			post.setEntity(new UrlEncodedFormEntity(parameters, StringUtils.isNotBlank(encoding) ? encoding : "UTF-8"));
			requestHeaders = post.getAllHeaders();
			HttpResponse response = httpClient.execute(post);
			responseHeaders = response.getAllHeaders();
			int statusCode = response.getStatusLine().getStatusCode();
			if (statusCode == HttpStatus.SC_OK) {
				HttpEntity entity = response.getEntity();
				if (entity != null) {
					html = getResponseString(entity, isGzip, defaultEncode);
				} else {
					logger.error("Post Entity is null. fullUrl:[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "],responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]");
					if (post != null) {
						post.abort();
					}
				}
			} else {
				logger.error("statusCode !=200 statusCode:[" + statusCode + "]. fullUrl:[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "],responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]");
				if (post != null) {
					post.abort();
				}
			}
		} catch (URISyntaxException e) {
			logger.error("URISyntaxException. fullUrl:[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "],responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (post != null) {
				post.abort();
			}
			throw e;
		} catch (UnsupportedEncodingException e) {
			logger.error("UnsupportedEncodingException. fullUrl:[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "],responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (post != null) {
				post.abort();
			}
			throw e;
		} catch (ClientProtocolException e) {
			logger.error("ClientProtocolException. fullUrl:[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "],responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (post != null) {
				post.abort();
			}
			throw e;
		} catch (IOException e) {
			logger.error("IOException. fullUrl:[" + fullURL + "],requestHeaders:[" + ArrayUtils.toString(requestHeaders) + "],responseHeaders:[" + ArrayUtils.toString(responseHeaders) + "]", e);
			if (post != null) {
				post.abort();
			}
			throw e;
		}
		return html;
	}

	public byte[] getResponseBytes(String fullURL) throws Exception {
		byte[] bytes = null;
		HttpGet getMethod = new HttpGet(fullURL);
		getMethod.getParams().setParameter(CoreProtocolPNames.USER_AGENT, getRandomUserAgent());
		try {
			ResponseHandler<byte[]> handler = new ByteResponseHandler();
			bytes = httpClient.execute(getMethod, handler);
		} catch (Exception e) {
			getMethod.abort();
			throw e;
		}
		return bytes;
	}

	private String makeGetURL(String baseUrl, List<NameValuePair> params, Encode urlEncode) throws ParseException, UnsupportedEncodingException, IOException {
		String url = baseUrl;
		if (CollectionUtils.isNotEmpty(params)) {
			String queryString = EntityUtils.toString(new UrlEncodedFormEntity(params, urlEncode.getEncode()));
			url = baseUrl + "?" + queryString;
		}
		return url;
	}

	private void setCustomRequestHeaders(HttpRequestBase request, String fullURL, boolean isGzip, List<Header> resetHeaders) throws URISyntaxException {
		HttpHost host = URIUtils.extractHost(new URI(fullURL));
		if (host != null && StringUtils.isNotBlank(host.getHostName())) {
			request.setHeader("Host", host.getHostName());
			request.setHeader("Origin", host.getHostName());
		}
		if (isGzip) {
			request.setHeader("Accept-Encoding", "gzip,deflate,sdch");
		}
		request.setHeader("User-Agent", getRandomUserAgent());
		if (CollectionUtils.isNotEmpty(resetHeaders)) {
			for (Header header : resetHeaders) {
				request.setHeader(header);
			}
		}
	}

	private String getEncoding(HttpEntity entity) {
		String encoding = null;
		Header header = entity.getContentEncoding();
		if (header != null) {
			encoding = header.getValue();
		}
		return encoding;
	}

	private String getResponseString(HttpEntity entity, boolean isGzip, Encode defaultEncode) throws IOException, UnsupportedEncodingException {
		String html = null;
		InputStream is = null;
		GZIPInputStream gzis = null;
		try {
			is = entity.getContent();
			boolean encodingGzip = StringUtils.equalsIgnoreCase(getEncoding(entity), "gzip");
			byte[] bytes;
			if (encodingGzip || (encodingGzip && isGzip)) {
				gzis = new GZIPInputStream(is);
				bytes = IOUtils.toByteArray(gzis);
			} else {
				bytes = IOUtils.toByteArray(is);
			}
			String charset = EntityUtils.getContentCharSet(entity);
			if (StringUtils.isBlank(charset)) {
				String tmpHtml = new String(bytes, "UTF-8");
				charset = Regex.matchSRowSField(tmpHtml, REG_CHARSET, false);
				if (StringUtils.isBlank(charset)) {
					charset = defaultEncode.getEncode();
				}
			}
			html = new String(bytes, charset);
		} finally {
			IOUtils.closeQuietly(gzis);
			IOUtils.closeQuietly(is);
		}
		return html;
	}

	private String getRandomUserAgent() {
		int minSubscript = 0;
		int maxSubscript = USER_AGENTS.size() - 1;
		int num = getRandomNum(minSubscript, maxSubscript);
		return USER_AGENTS.get(num);
	}

	private int getRandomNum(int min, int max) {
		Random random = new Random();
		int num = random.nextInt(max) % (max - min + 1) + min;
		return num;
	}

	public static List<NameValuePair> getNameValuePairForUrl(String url) {
		List<NameValuePair> pairs = new ArrayList<NameValuePair>();
		if (StringUtils.isNotBlank(url)) {
			String query = url;
			if (StringUtils.contains(url, "?")) {
				query = StringUtils.substringAfter(url, "?");
			}
			if (StringUtils.isNotBlank(query)) {
				String[] params = StringUtils.split(query, "&");
				if (ArrayUtils.isNotEmpty(params)) {
					for (String param : params) {
						String[] paramParts = StringUtils.split(param, "=");
						pairs.add(new BasicNameValuePair(paramParts[0], paramParts[1]));
					}
				}
			}
		}
		return pairs;
	}


}
